Completed
Push — master ( 328268...f445a7 )
by Elbert
01:07
created

w.analyzeMeta   B

Complexity

Conditions 5
Paths 7

Size

Total Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 5
nc 7
nop 2
dl 0
loc 22
rs 8.6737
c 2
b 0
f 0

1 Function

Rating   Name   Duplication   Size   Complexity  
A 0 5 4
1
/**
2
 * Wappalyzer v4
3
 *
4
 * Created by Elbert Alias <[email protected]>
5
 *
6
 * License: GPLv3 http://www.gnu.org/licenses/gpl-3.0.txt
7
 */
8
9
var wappalyzer = (function() {
10
	//'use strict';
11
12
	/**
13
	 * Application class
14
	 */
15
	var Application = function(app, detected) {
16
		this.app             = app;
17
		this.confidence      = {};
18
		this.confidenceTotal = 0;
19
		this.detected        = Boolean(detected);
20
		this.excludes        = [];
21
		this.version         = '';
22
		this.versions        = [];
23
	};
24
25
	Application.prototype = {
26
		/**
27
		 * Calculate confidence total
28
		 */
29
		getConfidence: function() {
30
			var total = 0, id;
31
32
			for ( id in this.confidence ) {
33
				total += this.confidence[id];
34
			}
35
36
			return this.confidenceTotal = Math.min(total, 100);
37
		},
38
39
		/**
40
		 * Resolve version number (find the longest version number that contains all shorter detected version numbers)
41
		 */
42
		getVersion: function() {
43
			var i, resolved;
44
45
			if ( !this.versions.length ) {
46
				return;
47
			}
48
49
			this.versions.sort(function(a, b) {
50
				return a.length - b.length;
51
			});
52
53
			resolved = this.versions[0];
54
55
			for ( i = 1; i < this.versions.length; i++ ) {
56
				if ( this.versions[i].indexOf(resolved) === -1 ) {
57
					break;
58
				}
59
60
				resolved = this.versions[i];
61
			}
62
63
			return this.version = resolved;
64
		},
65
66
		setDetected: function(pattern, type, value, key) {
67
			this.detected = true;
68
69
			// Set confidence level
70
			this.confidence[type + ' ' + ( key ? key + ' ' : '' ) + pattern.regex] = pattern.confidence ? pattern.confidence : 100;
71
72
			// Detect version number
73
			if ( pattern.version ) {
74
				var
75
					version = pattern.version,
76
					matches = pattern.regex.exec(value);
77
78
				if ( matches ) {
79
					matches.forEach(function(match, i) {
80
						// Parse ternary operator
81
						var ternary = new RegExp('\\\\' + i + '\\?([^:]+):(.*)$').exec(version);
82
83
						if ( ternary && ternary.length === 3 ) {
84
							w.log({ match: match, i: i, ternary: ternary });
85
86
							version = version.replace(ternary[0], match ? ternary[1] : ternary[2]);
87
88
							w.log({ version: version });
89
						}
90
91
						// Replace back references
92
						version = version.replace(new RegExp('\\\\' + i, 'g'), match ? match : '');
93
					});
94
95
					if ( version && this.versions.indexOf(version) < 0 ) {
96
						this.versions.push(version);
97
					}
98
99
					this.getVersion();
100
				}
101
			}
102
		}
103
	};
104
105
	/**
106
	 * Call driver functions
107
	 */
108
	var driver = function(func, args) {
109
		if ( typeof w.driver[func] !== 'function' ) {
110
			w.log('not implemented: w.driver.' + func, 'warn');
111
112
			return;
113
		}
114
115
		if ( func !== 'log' ) {
116
			w.log('w.driver.' + func);
117
		}
118
119
		return w.driver[func](args);
120
	};
121
122
	/**
123
	 * Parse apps.json patterns
124
	 */
125
	var parsePatterns = function(patterns) {
126
		var
127
			key,
128
			parsed = {};
129
130
		// Convert array to object containing array
131
		if ( patterns instanceof Array ) {
132
			patterns = { main: patterns }
133
		}
134
135
		// Convert string to object containing array containing string
136
		if ( typeof patterns === 'string' ) {
137
			patterns = { main: [ patterns ] };
138
		}
139
140
		for ( key in patterns ) {
141
			parsed[key] = [];
142
143
			// Convert string to array containing string
144
			if ( typeof patterns[key] === 'string' ) {
145
				patterns[key] = [ patterns[key] ];
146
			}
147
148
			patterns[key].forEach(function(pattern) {
149
				var attrs = {};
150
151
				pattern.split('\\;').forEach(function(attr, i) {
152
					if ( i ) {
153
						// Key value pairs
154
						attr = attr.split(':');
155
156
						if ( attr.length > 1 ) {
157
							attrs[attr.shift()] = attr.join(':');
158
						}
159
					} else {
160
						attrs.string = attr;
161
162
						try {
163
							attrs.regex = new RegExp(attr.replace('/', '\/'), 'i'); // Escape slashes in regular expression
164
						} catch (e) {
165
							attrs.regex = new RegExp();
166
167
							w.log(e + ': ' + attr, 'error');
168
						}
169
					}
170
				});
171
172
				parsed[key].push(attrs);
173
			});
174
		}
175
176
		// Convert back to array if the original pattern list was an array (or string)
177
		if ( parsed.hasOwnProperty('main') ) {
178
			parsed = parsed.main;
179
		}
180
181
		return parsed;
182
	};
183
184
	/**
185
	 * Main script
186
	 */
187
	var w = {
188
		apps:     {},
189
		cats:     null,
190
		ping:     { hostnames: {} },
191
		adCache:  [],
192
		detected: {},
193
194
		config: {
195
			websiteURL: 'https://wappalyzer.com/',
196
			twitterURL: 'https://twitter.com/Wappalyzer',
197
			githubURL:  'https://github.com/AliasIO/Wappalyzer',
198
		},
199
200
		/**
201
		 * Log messages to console
202
		 */
203
		log: function(message, type) {
204
			if ( type === undefined ) {
205
				type = 'debug';
206
			}
207
208
			if ( typeof message === 'object' ) {
209
				message = JSON.stringify(message);
210
			}
211
212
			driver('log', { message: message, type: type });
213
		},
214
215
		/**
216
		 * Initialize
217
		 */
218
		init: function() {
219
			w.log('w.init');
220
221
			// Checks
222
			if ( w.driver === undefined ) {
223
				w.log('no driver, exiting');
224
225
				return;
226
			}
227
228
			// Initialize driver
229
			driver('init');
230
		},
231
232
		/**
233
		 * Analyze the request
234
		 */
235
		analyze: function(hostname, url, data) {
236
			var
237
				app,
238
				apps = {};
239
240
			w.log('w.analyze');
241
242
			if ( w.apps === undefined || w.categories === undefined ) {
243
				w.log('apps.json not loaded, check for syntax errors');
244
245
				return;
246
			}
247
248
			// Remove hash from URL
249
			data.url = url = url.split('#')[0];
250
251
			if ( typeof data.html !== 'string' ) {
252
				data.html = '';
253
			}
254
255
			if ( w.detected[url] === undefined ) {
256
				w.detected[url] = {};
257
			}
258
259
			for ( app in w.apps ) {
260
				apps[app] = w.detected[url] && w.detected[url][app] ? w.detected[url][app] : new Application(app);
261
262
				if ( url ) {
263
					w.analyzeUrl(apps[app], url);
264
				}
265
266
				if ( data.html ) {
267
					w.analyzeHtml(apps[app], data.html);
268
					w.analyzeScript(apps[app], data.html);
269
					w.analyzeMeta(apps[app], data.html);
270
				}
271
272
				if ( data.headers ) {
273
					w.analyzeHeaders(apps[app], data.headers);
274
				}
275
276
				if ( data.env ) {
277
					w.analyzeEnv(apps[app], data.env);
278
				}
279
			}
280
281
			for ( app in apps ) {
282
				if ( !apps[app].detected ) {
283
					delete apps[app];
284
				}
285
			}
286
287
			w.resolveExcludes(apps);
288
			w.resolveImplies(apps, url);
289
290
			w.cacheDetectedApps(apps, url);
291
			w.trackDetectedApps(apps, url, hostname, data.html);
292
293
			w.log(Object.keys(apps).length + ' apps detected: ' + Object.keys(apps).join(', ') + ' on ' + url);
294
295
			driver('displayApps');
296
		},
297
298
		resolveExcludes: function(apps) {
299
			var
300
				app,
301
				excludes = [];
302
303
			// Exclude app in detected apps only
304
			for ( app in apps ) {
305
				if ( w.apps[app].excludes ) {
306
					if ( typeof w.apps[app].excludes === 'string' ) {
307
						w.apps[app].excludes = [ w.apps[app].excludes ];
308
					}
309
310
					w.apps[app].excludes.forEach(function(excluded) {
311
						excludes.push(excluded);
312
					});
313
				}
314
			}
315
316
			// Remove excluded applications
317
			for ( app in apps ) {
318
				if ( excludes.indexOf(app) !== -1 ) {
319
					delete apps[app];
320
				}
321
			}
322
		},
323
324
		resolveImplies: function(apps, url) {
325
			var
326
				confidence,
327
				id,
328
				checkImplies = true;
329
330
			// Implied applications
331
			// Run several passes as implied apps may imply other apps
332
			while ( checkImplies ) {
333
				checkImplies = false;
334
335
				for ( app in apps ) {
336
					confidence = apps[app].confidence;
337
338
					if ( w.apps[app] && w.apps[app].implies ) {
339
						// Cast strings to an array
340
						if ( typeof w.apps[app].implies === 'string' ) {
341
							w.apps[app].implies = [ w.apps[app].implies ];
342
						}
343
344
						w.apps[app].implies.forEach(function(implied) {
345
							implied = parsePatterns(implied)[0];
346
347
							if ( !w.apps[implied.string] ) {
348
								w.log('Implied application ' + implied.string + ' does not exist', 'warn');
349
350
								return;
351
							}
352
353
							if ( !apps.hasOwnProperty(implied.string) ) {
354
								apps[implied.string] = w.detected[url] && w.detected[url][implied.string] ? w.detected[url][implied.string] : new Application(implied.string, true);
355
356
								checkImplies = true;
357
							}
358
359
							// Apply app confidence to implied app
360
							for ( id in confidence ) {
361
								apps[implied.string].confidence[id + ' implied by ' + app] = confidence[id] * ( implied.confidence ? implied.confidence / 100 : 1 );
362
							}
363
						});
364
					}
365
				}
366
			}
367
		},
368
369
		/**
370
		 * Cache detected applications
371
		 */
372
		cacheDetectedApps: function(apps, url) {
373
			var app, id, confidence;
374
375
			for ( app in apps ) {
376
				confidence = apps[app].confidence;
377
378
				// Per URL
379
				w.detected[url][app] = apps[app];
380
381
				for ( id in confidence ) {
382
					w.detected[url][app].confidence[id] = confidence[id];
383
				}
384
			}
385
		},
386
387
		/**
388
		 * Track detected applications
389
		 */
390
		trackDetectedApps: function(apps, url, hostname, html) {
391
			var app, match;
392
393
			for ( app in apps ) {
394
				if ( w.detected[url][app].getConfidence() >= 100 ) {
395
					if ( /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/.test(hostname) && !/((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/.test(url) ) {
396
						if ( !w.ping.hostnames.hasOwnProperty(hostname) ) {
397
							w.ping.hostnames[hostname] = {
398
								applications: {},
399
								meta: {}
400
							};
401
						}
402
403
						if ( !w.ping.hostnames[hostname].applications.hasOwnProperty(app) ) {
404
							w.ping.hostnames[hostname].applications[app] = {
405
								hits: 0
406
							};
407
						}
408
409
						w.ping.hostnames[hostname].applications[app].hits ++;
410
411
						if ( apps[app].version ) {
412
							w.ping.hostnames[hostname].applications[app].version = apps[app].version;
413
						}
414
					} else {
415
						w.log('Ignoring hostname "' + hostname + '"');
416
					}
417
				}
418
			}
419
420
			// Additional information
421
			if ( w.ping.hostnames.hasOwnProperty(hostname) ) {
422
				match = html.match(/<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i);
423
424
				if ( match && match.length ) {
425
					w.ping.hostnames[hostname].meta['language'] = match[1];
426
				}
427
			}
428
429
			if ( Object.keys(w.ping.hostnames).length >= 50 || w.adCache.length >= 50 ) {
430
				driver('ping');
431
			}
432
		},
433
434
		/**
435
		 * Analyze URL
436
		 */
437
		analyzeUrl: function(app, url) {
438
			var patterns = parsePatterns(w.apps[app.app].url);
439
440
			if ( patterns.length ) {
441
				patterns.forEach(function(pattern) {
442
					if ( pattern.regex.test(url) ) {
443
						app.setDetected(pattern, 'url', url);
444
					}
445
				});
446
			}
447
		},
448
449
		/**
450
		 * Analyze HTML
451
		 */
452
		analyzeHtml: function(app, html) {
453
			var patterns = parsePatterns(w.apps[app.app].html);
454
455
			if ( patterns.length ) {
456
				patterns.forEach(function(pattern) {
457
					if ( pattern.regex.test(html) ) {
458
						app.setDetected(pattern, 'html', html);
459
					}
460
				});
461
			}
462
		},
463
464
		/**
465
		 * Analyze script tag
466
		 */
467
		analyzeScript: function(app, html) {
468
			var
469
				regex = new RegExp('<script[^>]+src=("|\')([^"\']+)', 'ig'),
470
				patterns = parsePatterns(w.apps[app.app].script);
471
472
			if ( patterns.length ) {
473
				patterns.forEach(function(pattern) {
474
					var match;
475
476
					while ( match = regex.exec(html) ) {
477
						if ( pattern.regex.test(match[2]) ) {
478
							app.setDetected(pattern, 'script', match[2]);
479
						}
480
					}
481
				});
482
			}
483
		},
484
485
		/**
486
		 * Analyze meta tag
487
		 */
488
		analyzeMeta: function(app, html) {
489
			var
490
				content, match, meta,
491
				regex = /<meta[^>]+>/ig,
492
				patterns = parsePatterns(w.apps[app.app].meta);
493
494
			if ( patterns.length ) {
495
				while ( match = regex.exec(html) ) {
496
					for ( meta in patterns ) {
497
						if ( new RegExp('(name|property)=["\']' + meta + '["\']', 'i').test(match) ) {
498
							content = match.toString().match(/content=("|')([^"']+)("|')/i);
499
500
							patterns[meta].forEach(function(pattern) {
501
								if ( content && content.length === 4 && pattern.regex.test(content[2]) ) {
502
									app.setDetected(pattern, 'meta', content[2], meta);
503
								}
504
							});
505
						}
506
					}
507
				}
508
			}
509
		},
510
511
		/**
512
		 * analyze response headers
513
		 */
514
		analyzeHeaders: function(app, headers) {
515
			var
516
				header,
517
				patterns = parsePatterns(w.apps[app.app].headers);
518
519
			if ( patterns.length && headers ) {
520
				for ( header in patterns ) {
521
					patterns[header].forEach(function(pattern) {
522
						header = header.toLowerCase();
523
524
						if ( headers.hasOwnProperty(header) && pattern.regex.test(headers[header]) ) {
525
							app.setDetected(pattern, 'headers', headers[header], header);
526
						}
527
					});
528
				}
529
			}
530
		},
531
532
		/**
533
		 * Analyze environment variables
534
		 */
535
		analyzeEnv: function(app, envs) {
536
			var patterns = parsePatterns(w.apps[app.app].env);
537
538
			if ( patterns.length ) {
539
				patterns.forEach(function(pattern) {
540
					var env;
541
542
					for ( env in envs ) {
543
						if ( pattern.regex.test(envs[env]) ) {
544
							app.setDetected(pattern, 'env', envs[env]);
545
						}
546
					}
547
				});
548
			}
549
		}
550
	};
551
552
	return w;
553
})();
554
555
// CommonJS package
556
// See http://wiki.commonjs.org/wiki/CommonJS
557
if ( typeof exports === 'object' ) {
558
	exports.wappalyzer = wappalyzer;
559
}
560